# Importing the dataset
marvel <- read.csv("../data/marvel.csv")
dc <- read.csv("../data/dc.csv")
head(marvel)
## page_id name
## 1 1678 Spider-Man (Peter Parker)
## 2 7139 Captain America (Steven Rogers)
## 3 64786 Wolverine (James \\"Logan\\" Howlett)
## 4 1868 Iron Man (Anthony \\"Tony\\" Stark)
## 5 2460 Thor (Thor Odinson)
## 6 2458 Benjamin Grimm (Earth-616)
## urlslug ID ALIGN
## 1 \\/Spider-Man_(Peter_Parker) Secret Identity Good Characters
## 2 \\/Captain_America_(Steven_Rogers) Public Identity Good Characters
## 3 \\/Wolverine_(James_%22Logan%22_Howlett) Public Identity Neutral Characters
## 4 \\/Iron_Man_(Anthony_%22Tony%22_Stark) Public Identity Good Characters
## 5 \\/Thor_(Thor_Odinson) No Dual Identity Good Characters
## 6 \\/Benjamin_Grimm_(Earth-616) Public Identity Good Characters
## EYE HAIR SEX GSM ALIVE APPEARANCES
## 1 Hazel Eyes Brown Hair Male Characters Living Characters 4043
## 2 Blue Eyes White Hair Male Characters Living Characters 3360
## 3 Blue Eyes Black Hair Male Characters Living Characters 3061
## 4 Blue Eyes Black Hair Male Characters Living Characters 2961
## 5 Blue Eyes Blond Hair Male Characters Living Characters 2258
## 6 Blue Eyes No Hair Male Characters Living Characters 2255
## FIRST.APPEARANCE Year
## 1 Aug-62 1962
## 2 Mar-41 1941
## 3 Oct-74 1974
## 4 Mar-63 1963
## 5 Nov-50 1950
## 6 Nov-61 1961
head(dc)
## page_id name urlslug
## 1 1422 Batman (Bruce Wayne) \\/wiki\\/Batman_(Bruce_Wayne)
## 2 23387 Superman (Clark Kent) \\/wiki\\/Superman_(Clark_Kent)
## 3 1458 Green Lantern (Hal Jordan) \\/wiki\\/Green_Lantern_(Hal_Jordan)
## 4 1659 James Gordon (New Earth) \\/wiki\\/James_Gordon_(New_Earth)
## 5 1576 Richard Grayson (New Earth) \\/wiki\\/Richard_Grayson_(New_Earth)
## 6 1448 Wonder Woman (Diana Prince) \\/wiki\\/Wonder_Woman_(Diana_Prince)
## ID ALIGN EYE HAIR SEX GSM
## 1 Secret Identity Good Characters Blue Eyes Black Hair Male Characters
## 2 Secret Identity Good Characters Blue Eyes Black Hair Male Characters
## 3 Secret Identity Good Characters Brown Eyes Brown Hair Male Characters
## 4 Public Identity Good Characters Brown Eyes White Hair Male Characters
## 5 Secret Identity Good Characters Blue Eyes Black Hair Male Characters
## 6 Public Identity Good Characters Blue Eyes Black Hair Female Characters
## ALIVE APPEARANCES FIRST.APPEARANCE YEAR
## 1 Living Characters 3093 1939, May 1939
## 2 Living Characters 2496 1986, October 1986
## 3 Living Characters 1565 1959, October 1959
## 4 Living Characters 1316 1987, February 1987
## 5 Living Characters 1237 1940, April 1940
## 6 Living Characters 1231 1941, December 1941
print(dim(marvel))
## [1] 16376 13
print(dim(dc))
## [1] 6896 13
summary(marvel)
## page_id name urlslug ID
## Min. : 1025 Length:16376 Length:16376 Length:16376
## 1st Qu.: 28310 Class :character Class :character Class :character
## Median :282578 Mode :character Mode :character Mode :character
## Mean :300232
## 3rd Qu.:509077
## Max. :755278
##
## ALIGN EYE HAIR SEX
## Length:16376 Length:16376 Length:16376 Length:16376
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## GSM ALIVE APPEARANCES FIRST.APPEARANCE
## Length:16376 Length:16376 Min. : 1.00 Length:16376
## Class :character Class :character 1st Qu.: 1.00 Class :character
## Mode :character Mode :character Median : 3.00 Mode :character
## Mean : 17.03
## 3rd Qu.: 8.00
## Max. :4043.00
## NA's :1096
## Year
## Min. :1939
## 1st Qu.:1974
## Median :1990
## Mean :1985
## 3rd Qu.:2000
## Max. :2013
## NA's :815
summary(dc)
## page_id name urlslug ID
## Min. : 1380 Length:6896 Length:6896 Length:6896
## 1st Qu.: 44106 Class :character Class :character Class :character
## Median :141267 Mode :character Mode :character Mode :character
## Mean :147441
## 3rd Qu.:213203
## Max. :404010
##
## ALIGN EYE HAIR SEX
## Length:6896 Length:6896 Length:6896 Length:6896
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## GSM ALIVE APPEARANCES FIRST.APPEARANCE
## Length:6896 Length:6896 Min. : 1.00 Length:6896
## Class :character Class :character 1st Qu.: 2.00 Class :character
## Mode :character Mode :character Median : 6.00 Mode :character
## Mean : 23.63
## 3rd Qu.: 15.00
## Max. :3093.00
## NA's :355
## YEAR
## Min. :1935
## 1st Qu.:1983
## Median :1992
## Mean :1990
## 3rd Qu.:2003
## Max. :2013
## NA's :69
# Cleaning the datasets
colSums(is.na(marvel))
## page_id name urlslug ID
## 0 0 0 0
## ALIGN EYE HAIR SEX
## 0 0 0 0
## GSM ALIVE APPEARANCES FIRST.APPEARANCE
## 0 0 1096 0
## Year
## 815
colSums(is.na(dc))
## page_id name urlslug ID
## 0 0 0 0
## ALIGN EYE HAIR SEX
## 0 0 0 0
## GSM ALIVE APPEARANCES FIRST.APPEARANCE
## 0 0 355 0
## YEAR
## 69
cols <- c('page_id', 'urlslug', 'GSM', 'Year')
marvel <- marvel %>% select(-cols)
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
## # Was:
## data %>% select(cols)
##
## # Now:
## data %>% select(all_of(cols))
##
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
cols <- c('page_id', 'urlslug', 'GSM', 'YEAR')
dc <- dc %>% select(-cols)
head(marvel)
## name ID ALIGN
## 1 Spider-Man (Peter Parker) Secret Identity Good Characters
## 2 Captain America (Steven Rogers) Public Identity Good Characters
## 3 Wolverine (James \\"Logan\\" Howlett) Public Identity Neutral Characters
## 4 Iron Man (Anthony \\"Tony\\" Stark) Public Identity Good Characters
## 5 Thor (Thor Odinson) No Dual Identity Good Characters
## 6 Benjamin Grimm (Earth-616) Public Identity Good Characters
## EYE HAIR SEX ALIVE APPEARANCES
## 1 Hazel Eyes Brown Hair Male Characters Living Characters 4043
## 2 Blue Eyes White Hair Male Characters Living Characters 3360
## 3 Blue Eyes Black Hair Male Characters Living Characters 3061
## 4 Blue Eyes Black Hair Male Characters Living Characters 2961
## 5 Blue Eyes Blond Hair Male Characters Living Characters 2258
## 6 Blue Eyes No Hair Male Characters Living Characters 2255
## FIRST.APPEARANCE
## 1 Aug-62
## 2 Mar-41
## 3 Oct-74
## 4 Mar-63
## 5 Nov-50
## 6 Nov-61
head(dc)
## name ID ALIGN EYE
## 1 Batman (Bruce Wayne) Secret Identity Good Characters Blue Eyes
## 2 Superman (Clark Kent) Secret Identity Good Characters Blue Eyes
## 3 Green Lantern (Hal Jordan) Secret Identity Good Characters Brown Eyes
## 4 James Gordon (New Earth) Public Identity Good Characters Brown Eyes
## 5 Richard Grayson (New Earth) Secret Identity Good Characters Blue Eyes
## 6 Wonder Woman (Diana Prince) Public Identity Good Characters Blue Eyes
## HAIR SEX ALIVE APPEARANCES FIRST.APPEARANCE
## 1 Black Hair Male Characters Living Characters 3093 1939, May
## 2 Black Hair Male Characters Living Characters 2496 1986, October
## 3 Brown Hair Male Characters Living Characters 1565 1959, October
## 4 White Hair Male Characters Living Characters 1316 1987, February
## 5 Black Hair Male Characters Living Characters 1237 1940, April
## 6 Black Hair Female Characters Living Characters 1231 1941, December
marvel <- marvel %>% drop_na(FIRST.APPEARANCE)
dc <- dc %>% drop_na(FIRST.APPEARANCE)
marvel <- marvel %>% separate(FIRST.APPEARANCE, c("MONTH", "YEAR"), "-")
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 815 rows [13, 39,
## 81, 115, 260, 311, 414, 684, 790, 855, 998, 1119, 1159, 1317, 1455, 1565, 1566,
## 1846, 1938, 2034, ...].
dc <- dc %>% separate(FIRST.APPEARANCE, c("YEAR", "MONTH"), ", ")
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 213 rows [210,
## 261, 338, 364, 387, 558, 584, 618, 643, 715, 812, 844, 890, 1115, 1201, 1287,
## 1352, 1355, 1401, 1402, ...].
marvel$YEAR <- ifelse(as.integer(marvel$YEAR) > 21, paste0('19', marvel$YEAR), paste0('20', marvel$YEAR))
head(marvel)
## name ID ALIGN
## 1 Spider-Man (Peter Parker) Secret Identity Good Characters
## 2 Captain America (Steven Rogers) Public Identity Good Characters
## 3 Wolverine (James \\"Logan\\" Howlett) Public Identity Neutral Characters
## 4 Iron Man (Anthony \\"Tony\\" Stark) Public Identity Good Characters
## 5 Thor (Thor Odinson) No Dual Identity Good Characters
## 6 Benjamin Grimm (Earth-616) Public Identity Good Characters
## EYE HAIR SEX ALIVE APPEARANCES MONTH
## 1 Hazel Eyes Brown Hair Male Characters Living Characters 4043 Aug
## 2 Blue Eyes White Hair Male Characters Living Characters 3360 Mar
## 3 Blue Eyes Black Hair Male Characters Living Characters 3061 Oct
## 4 Blue Eyes Black Hair Male Characters Living Characters 2961 Mar
## 5 Blue Eyes Blond Hair Male Characters Living Characters 2258 Nov
## 6 Blue Eyes No Hair Male Characters Living Characters 2255 Nov
## YEAR
## 1 1962
## 2 1941
## 3 1974
## 4 1963
## 5 1950
## 6 1961
marvel$MONTH <- recode(marvel$MONTH,
'Jan' = "January",
'Feb' = "February",
'Mar' = "March",
'Apr' = "April",
'Jun' = "June",
'Jul' = "July",
'Aug' = "August",
'Sep' = "September",
'Oct' = "October",
'Nov' = "November",
'Dec' = "December")
head(marvel)
## name ID ALIGN
## 1 Spider-Man (Peter Parker) Secret Identity Good Characters
## 2 Captain America (Steven Rogers) Public Identity Good Characters
## 3 Wolverine (James \\"Logan\\" Howlett) Public Identity Neutral Characters
## 4 Iron Man (Anthony \\"Tony\\" Stark) Public Identity Good Characters
## 5 Thor (Thor Odinson) No Dual Identity Good Characters
## 6 Benjamin Grimm (Earth-616) Public Identity Good Characters
## EYE HAIR SEX ALIVE APPEARANCES MONTH
## 1 Hazel Eyes Brown Hair Male Characters Living Characters 4043 August
## 2 Blue Eyes White Hair Male Characters Living Characters 3360 March
## 3 Blue Eyes Black Hair Male Characters Living Characters 3061 October
## 4 Blue Eyes Black Hair Male Characters Living Characters 2961 March
## 5 Blue Eyes Blond Hair Male Characters Living Characters 2258 November
## 6 Blue Eyes No Hair Male Characters Living Characters 2255 November
## YEAR
## 1 1962
## 2 1941
## 3 1974
## 4 1963
## 5 1950
## 6 1961
head(dc)
## name ID ALIGN EYE
## 1 Batman (Bruce Wayne) Secret Identity Good Characters Blue Eyes
## 2 Superman (Clark Kent) Secret Identity Good Characters Blue Eyes
## 3 Green Lantern (Hal Jordan) Secret Identity Good Characters Brown Eyes
## 4 James Gordon (New Earth) Public Identity Good Characters Brown Eyes
## 5 Richard Grayson (New Earth) Secret Identity Good Characters Blue Eyes
## 6 Wonder Woman (Diana Prince) Public Identity Good Characters Blue Eyes
## HAIR SEX ALIVE APPEARANCES YEAR MONTH
## 1 Black Hair Male Characters Living Characters 3093 1939 May
## 2 Black Hair Male Characters Living Characters 2496 1986 October
## 3 Brown Hair Male Characters Living Characters 1565 1959 October
## 4 White Hair Male Characters Living Characters 1316 1987 February
## 5 Black Hair Male Characters Living Characters 1237 1940 April
## 6 Black Hair Female Characters Living Characters 1231 1941 December
This code snippet is part of a project that analyzes the introduction of characters over time in Marvel and DC comics.
The first two lines convert the “YEAR” column from characters to numeric values in both the Marvel and DC data frames.
The next four lines sort the Marvel and DC data frames by year, with missing values placed last.
Finally, the code prints the year of the first character introduced in Marvel and DC comics using the cat function. The output will be a message that shows the year of the first character introduced in Marvel and DC comics respectively.
# 1. Introduction of characters over time
# Convert years from characters to numeric values
marvel$YEAR <- as.numeric(marvel$YEAR)
dc$YEAR <- as.numeric(dc$YEAR)
# Sort MARVEL characters by year
marvel_sorted <- marvel[order(marvel$YEAR, na.last = NA),]
# Print the first character of MARVEL
cat("The first character of MARVEL appeared in the year ", marvel_sorted$YEAR[1], "\n")
## The first character of MARVEL appeared in the year 1939
# Sort DC characters by year
dc_sorted <- dc[order(dc$YEAR, na.last = NA),]
# Print the first character of DC
cat("The first character of DC appeared in the year ", dc_sorted$YEAR[1], "\n")
## The first character of DC appeared in the year 1935
This code snippet creates a density plot that shows the distribution of appearance of heroes in comic books over time for both Marvel and DC.
The first two lines define a ggplot object and specify the data frame and aesthetics for the plot. Two geom_density layers are added to the plot to create the density curves for Marvel and DC data. The fill parameter inside the aes function specifies the fill color of each density curve.
The alpha parameter controls the transparency of the density curves, with a value of 0.5 indicating that the curves are semi-transparent. The color parameter sets the color of the outline of each density curve.
The labs function call adds a title to the plot. The theme_minimal function call changes the theme of the plot to a minimalist style.
Finally, the scale_fill_manual function call sets the fill color of the density curves to red for Marvel and blue for DC.
ggplot() +
geom_density(aes(x = as.numeric(marvel$YEAR), fill = "Marvel"), alpha = 0.5, color = "red") +
geom_density(aes(x = as.numeric(dc$YEAR), fill = "DC"), alpha = 0.5, color = "blue") +
labs(title = "Distribution of Appearance of heroes in comic in years") +
theme_minimal() +
scale_fill_manual(values = c("red", "blue"))
## Warning: Removed 815 rows containing non-finite values (`stat_density()`).
## Warning: Removed 69 rows containing non-finite values (`stat_density()`).
This code snippet creates a grid of two density plots for each gender category (female and male) that show the ratio of characters created over time for both Marvel and DC comics.
The first eight lines of code filter the Marvel and DC data frames by gender category (female, male, genderfluid, agender, and transgender), creating four new data frames for each gender category for each publisher.
The gender_density_plot function takes three parameters: data, which specifies the data frame to use for the plot; var, which specifies the variable to use for the x-axis; label, which specifies the title of the plot; and color, which specifies the fill color of the density curve. This function creates a density plot for the specified gender category.
The options function call sets the size of the plot. The plot_grid function creates a grid of two plots for each gender category, one for Marvel and one for DC, using the gender_density_plot function. The geom_density layer is added to each plot to create the density curve for the corresponding data frame. The ncol and nrow parameters set the number of columns and rows for the grid layout.
marvel_female_characters <- marvel %>% filter(SEX == 'Female Characters')
dc_female_characters <- dc %>% filter(SEX == 'Female Characters')
marvel_male_characters <- marvel %>% filter(SEX == 'Male Characters')
dc_male_characters <- dc %>% filter(SEX == 'Male Characters')
marvel_gf_characters <- marvel %>% filter(SEX == 'Genderfluid Characters')
dc_gf_characters <- dc %>% filter(SEX == 'Genderless Characters')
marvel_ag_characters <- marvel %>% filter(SEX == 'Agender Characters')
dc_tg_characters <- dc %>% filter(SEX == 'Transgender Characters')
gender_density_plot <- function(data, var, label, color) {
ggplot(data = data, aes_string(x = var)) +
geom_density(fill = color, alpha = 0.5) +
labs(title = label, x = "Year") +
theme_minimal() +
theme(plot.title = element_text(size = rel(0.8))) # Decrease the font size of the title
}
options(repr.plot.width = 30, repr.plot.height = 8) # Increase the plot size
plot_grid(
gender_density_plot(marvel_female_characters, 'YEAR', 'Ratio of Female characters created over the years - Marvel', 'red') +
geom_density(data = dc_female_characters, aes(x = as.numeric(YEAR)), fill = "blue", alpha = 0.5),
gender_density_plot(marvel_male_characters, 'YEAR', 'Ratio of Male characters created over the years - Marvel', 'red') +
geom_density(data = dc_male_characters, aes(x = as.numeric(YEAR)), fill = "blue", alpha = 0.5),
ncol = 2,
nrow = 1
)
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation ideoms with `aes()`
## Warning: Removed 209 rows containing non-finite values (`stat_density()`).
## Warning: Removed 20 rows containing non-finite values (`stat_density()`).
## Warning: Removed 538 rows containing non-finite values (`stat_density()`).
## Warning: Removed 48 rows containing non-finite values (`stat_density()`).
This code snippet continues the analysis of the introduction of characters over time for Marvel and DC comics by creating four density plots for each gender category (female, male, agender/genderless, and genderfluid/transgender) that show the ratio of characters created over time for both publishers.
The first twelve lines of code add a new column called “Publisher” to each of the filtered data frames for each gender category, and assign the corresponding publisher to each data frame.
The gender_density_plot function now takes two data frame parameters, data1 and data2, for the two publishers being compared. The rbind function is used to combine the two data frames into a single data frame for plotting.
The scale_fill_manual function call sets the fill colors of the density curves for each publisher to red for Marvel and blue for DC. The name parameter sets the title of the legend to “Publisher”, and the labels parameter sets the label names for each fill color.
The options function call sets the size of the plot. Four calls to the gender_density_plot function are made to create four density plots for each gender category, one for each publisher being compared. The output will be a grid of four plots that show the ratio of characters created over time for each gender category, comparing Marvel and DC.
# Add a Publisher column to each data frame
marvel_female_characters$Publisher <- "Marvel"
dc_female_characters$Publisher <- "DC"
marvel_male_characters$Publisher <- "Marvel"
dc_male_characters$Publisher <- "DC"
marvel_ag_characters$Publisher <- "Marvel"
dc_gf_characters$Publisher <- "DC"
marvel_gf_characters$Publisher <- "Marvel"
dc_tg_characters$Publisher <- "DC"
gender_density_plot <- function(data1, data2, var, label, color1, color2) {
combined_data <- rbind(data1, data2)
ggplot(data = combined_data, aes_string(x = var, fill = "Publisher")) +
geom_density(alpha = 0.5) +
labs(title = label, x = "Year") +
scale_fill_manual(values = c(Marvel = color1, DC = color2),
name = "Publisher",
labels = c("Marvel", "DC")) +
theme_minimal() +
theme(plot.title = element_text(size = rel(0.8)))
}
options(repr.plot.width = 12, repr.plot.height = 8)
# Plot for Female Characters
female_plot <- gender_density_plot(marvel_female_characters, dc_female_characters, 'YEAR', 'Ratio of Female characters created over the years', 'red', 'blue')
print(female_plot)
## Warning: Removed 229 rows containing non-finite values (`stat_density()`).
# Plot for Male Characters
male_plot <- gender_density_plot(marvel_male_characters, dc_male_characters, 'YEAR', 'Ratio of Male characters created over the years', 'red', 'blue')
print(male_plot)
## Warning: Removed 586 rows containing non-finite values (`stat_density()`).
# Plot for Agender/Genderless Characters
ag_plot <- gender_density_plot(marvel_ag_characters, dc_gf_characters, 'YEAR', 'Ratio of Agender/Genderless characters created over the years', 'red', 'blue')
print(ag_plot)
## Warning: Removed 9 rows containing non-finite values (`stat_density()`).
# Plot for Genderfluid/Transgender Characters
gf_plot <- gender_density_plot(marvel_gf_characters, dc_tg_characters, 'YEAR', 'Ratio of Genderfluid/Transgender characters created over the years', 'red', 'blue')
print(gf_plot)
## Warning: Groups with fewer than two data points have been dropped.
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
# When was the first female character introduced?
min(dc_female_characters$YEAR, na.rm = TRUE)
## [1] 1936
min(marvel_female_characters$YEAR, na.rm = TRUE)
## [1] 1939
min(marvel_gf_characters$YEAR, na.rm = TRUE)
## [1] 1949
min(dc_gf_characters$YEAR, na.rm = TRUE)
## [1] 1961
min(marvel_ag_characters$YEAR, na.rm = TRUE)
## [1] 1964
min(dc_tg_characters$YEAR, na.rm = TRUE)
## [1] 2009
This code snippet creates two bar plots that show the number of first appearances of characters in Marvel and DC comics by year.
The first four lines of code use the %>% pipe operator to filter out missing values from the “YEAR” column in the Marvel data frame using the na.omit function, and count the number of characters that appeared in each year using the count function. This creates a new data frame that can be plotted.
The ggplot function is used to create a new plot object, and aes is used to specify the aesthetics for the plot. The geom_bar function is used to create a bar plot where the height of each bar represents the number of characters that appeared in each year.
The labs function call adds a title to the plot. The theme_minimal function call changes the theme of the plot to a minimalist style. The theme function is used to adjust the x-axis text angle to 90 degrees, making it easier to read the year labels.
The next four lines of code follow a similar pattern as the first four lines, but for the DC data frame instead. The output will be two separate bar plots that show the number of first appearances of characters in Marvel and DC comics by year.
# First Appearances by Year
marvel %>% na.omit() %>% count(YEAR) %>%
ggplot(aes(x = as.factor(YEAR), y = n)) +
geom_bar(stat = "identity", fill = "red") +
labs(title = "Marvel First Appearances by Year") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
dc %>% na.omit() %>% count(YEAR) %>%
ggplot(aes(x = as.factor(YEAR), y = n)) +
geom_bar(stat = "identity", fill = "blue") +
labs(title = "DC First Appearances by Year") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
This code snippet analyzes the gender diversity of characters in Marvel and DC comics by creating two pie charts.
The first two lines of code use the %>% pipe operator to count the number of characters for each gender category in the Marvel and DC data frames, and filter out empty values using the filter function. This creates two new data frames that can be plotted.
The plot_ly function from the plotly package is used to create a new plot object. The labels parameter is used to specify the labels for the slices of the pie chart, and the values parameter is used to specify the values for each slice. The type parameter is set to “pie” to create a pie chart, and the name parameter sets the name of the plot.
The layout function call adds a title to the plot. The fig object is printed to display the pie chart. The code creates two pie charts, one for Marvel and one for DC, that show the gender diversity of characters.
# 2. Gender proportionality
sex_m <- marvel %>% count(SEX) %>% filter(SEX != "")
sex_dc <- dc %>% count(SEX) %>% filter(SEX != "")
fig <- plot_ly(sex_m, labels = ~SEX, values = ~n, type = "pie", name = "Marvel") %>%
layout(title = "Gender diversity in Marvel")
fig
fig <- plot_ly(sex_dc, labels = ~SEX, values = ~n, type = "pie", name = "DC") %>%
layout(title = "Gender diversity in DC")
fig
This code snippet creates six bar plots that show the count of characters in each gender category (Male Characters, Female Characters, Genderfluid Characters, Agender Characters, and Transgender Characters) for different attributes (Alignment, Identity, and Living Status) in Marvel and DC comics.
The first two lines of code remove rows with empty values in the “SEX” column of the Marvel and DC data frames using the filter function.
The plot_count function is defined to create a bar plot that shows the count of characters for each gender category in a given data frame and attribute. The function takes six arguments: data (the data frame to plot), x_var (the attribute to plot on the x-axis), hue_var (the variable to group by and fill the bars with), title (the title of the plot), palette (the color palette to use for the bars), and scale_x_discrete (a function that is used to customize the x-axis labels).
The ggplot function is used to create a new plot object, and aes_string is used to specify the aesthetics for the plot. The geom_bar function is used to create a bar plot where the height of each bar represents the count of characters for each gender category. The labs function call adds a title and axis labels to the plot. The theme function is used to adjust the appearance of the plot. The scale_fill_manual function is used to customize the colors of the bars.
The last six lines of code call the plot_count function with different arguments to create six separate bar plots that show the count of characters in each gender category for different attributes in Marvel and DC comics. The print function is used to display the plots.
# Remove rows with empty values in the SEX column of marvel data frame
marvel <- marvel %>% filter(SEX != "")
# Remove rows with empty values in the SEX column of dc data frame
dc <- dc %>% filter(SEX != "")
plot_count <- function(data, x_var, hue_var, title, palette) {
ggplot(data = data, aes_string(x = x_var, fill = hue_var)) +
geom_bar(position = "dodge") +
labs(title = title, x = x_var, y = "Count") +
theme_bw() +
theme(
plot.title = element_text(size = 14, face = "bold", hjust = 0.5),
axis.title = element_text(size = 12, face = "bold"),
axis.text = element_text(size = 10),
legend.title = element_text(size = 12, face = "bold"),
legend.text = element_text(size = 10),
panel.grid.major = element_line(color = "grey", linetype = "dashed"),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank()
) +
scale_fill_manual(values = palette)+
scale_x_discrete(labels = c("Good", "Bad", "Neutral", "Unknown"))
}
# Marvel Sex vs Align
plot1 <- plot_count(marvel, "ALIGN", "SEX", "Marvel Sex vs Align", c("red", "blue", "green", "orange", "purple"))
print(plot1)
# DC Sex vs Align
plot2 <- plot_count(dc, "ALIGN", "SEX", "DC Sex vs Align", c("red", "blue", "green", "orange", "purple"))
print(plot2)
# Marvel Sex vs Identity
plot3 <- plot_count(marvel, "ID", "SEX", "Marvel Sex vs Identity", c("red", "blue", "green", "orange", "purple"))
print(plot3)
# DC Sex vs Identity
plot4 <- plot_count(dc, "ID", "SEX", "DC Sex vs Identity", c("red", "blue", "green", "orange", "purple"))
print(plot4)
# Marvel Sex vs Living status
plot5 <- plot_count(marvel, "ALIVE", "SEX", "Marvel Sex vs Living status", c("red", "blue", "green", "orange", "purple"))
print(plot5)
# DC Sex vs Living status
plot6 <- plot_count(dc, "ALIVE", "SEX", "DC Sex vs Living status", c("red", "blue", "green", "orange", "purple"))
print(plot6)
This code defines a function called top_10_pie_plotly that creates pie charts using the plotly library. It takes as inputs a data frame (df), a column containing the labels for the pie chart (labels), a column containing the values for the pie chart (values), and a title for the chart (title).
The function is then used to create three pie charts: one for the top 10 Marvel characters by appearances, one for the top 10 DC characters by appearances, and one for the top 10 characters overall (combining both Marvel and DC). The code selects the top 10 characters for each data frame using arrange and head functions.
# Function for creating pie charts with plotly
top_10_pie_plotly <- function(df, labels, values, title) {
pie_chart <- plot_ly(df, labels = labels, values = values, type = "pie") %>%
layout(title = title)
pie_chart
}
# Top 10 appearances in Marvel
top_10_appearances_m <- marvel %>% arrange(desc(APPEARANCES)) %>% head(10)
top_10_pie_plotly(top_10_appearances_m, top_10_appearances_m$name, top_10_appearances_m$APPEARANCES, "Top 10 Marvel Characters by Appearances")
# Top 10 appearances in DC
top_10_appearances_dc <- dc %>% arrange(desc(APPEARANCES)) %>% head(10)
top_10_pie_plotly(top_10_appearances_dc, top_10_appearances_dc$name, top_10_appearances_dc$APPEARANCES, "Top 10 DC Characters by Appearances")
# Combining Marvel and DC data
dc_marvel <- rbind(dc, marvel)
# Top 10 appearances in combined dataset
top_10_appearances_dc_marvel <- dc_marvel %>% arrange(desc(APPEARANCES)) %>% head(10)
top_10_pie_plotly(top_10_appearances_dc_marvel, top_10_appearances_dc_marvel$name, top_10_appearances_dc_marvel$APPEARANCES, "Top 10 DC and Marvel Characters by Appearances")